%matplotlib inline
%reload_ext autoreload
%autoreload 2Timor Leste Model Rollout Part 1 (Compute Per-country Populated Grids)
Import packages
import sys
sys.path.append("../")
from povertymapping.rollout_grids import get_region_filtered_bingtile_grids, compute_raster_stats
from geowrangler.raster_zonal_stats import create_raster_zonal_stats
import geopandas as gpd
import pandas as pd
import numpy as np
from tqdm import tqdm
import rasterio as rio
from rasterstats import zonal_statsSet global parameters
REGION = 'indonesia'
ADMIN_LVL = 'ADM2'Generate/Cache/Get per country grids
get_region_filtered_bingtile_grids?Signature:
get_region_filtered_bingtile_grids(
region: str,
admin_lvl='ADM2',
quadkey_lvl=14,
use_cache=True,
cache_dir='~/.cache/geowrangler',
filter_population=True,
assign_grid_admin_area=True,
metric_crs='epsg:3857',
extra_args={'nodata': nan},
max_batch_size=None,
n_workers=None,
) -> geopandas.geodataframe.GeoDataFrame
Docstring:
Get a geodataframe consisting of bing tile grids for a region/country at a quadkey level.
By default, the grids are filtered by population
Arguments:
region: (required) the country/region for which grids will be created
admin_lvl: (default: ADM2) the administrative level boundaries used for assigning the grids
quadkey_lvl: (default: 14) the bingtile grid size zoom level
use_cache: (default: True) whether to use a cached version or overwrite existing file
cache_dir: (default: '~/.cache/geowrangler') directory where grids geojson will be created
filter_population: (default: True) - whether to filter out grids with zero population counts
assign_grid_admin_area: (default: True) whether to merge the admin level area data to the grids data
metric_crs: (default: 'epsg:3857') - CRS to use for assigning for admin areas
extra_args: (default: dict(nodata=np.nan)) - extra arguments passed to raster zonal stats computing
max_batch_size: (default:None) - set batch size to limit memory used for raster zonal stats
n_workers: (default:None) - set number of workers to parallelize raster zonal stats computation per batch
File: ~/project_repos/unicef-ai4d-poverty-mapping/povertymapping/rollout_grids.py
Type: function
%%time
admin_grids_gdf = get_region_filtered_bingtile_grids(
REGION,
admin_lvl=ADMIN_LVL,
filter_population=False
)2023-03-07 11:35:16.979 | INFO | povertymapping.rollout_grids:get_region_filtered_bingtile_grids:225 - Loading cached grids file /home/jc_tm/.cache/geowrangler/quadkey_grids/indonesia_14_ADM2_admin_grids.geojson
CPU times: user 23.2 s, sys: 1.55 s, total: 24.8 s
Wall time: 24.8 s
Explore per country populated grids
admin_grids_gdf.head()| quadkey | shapeName | shapeISO | shapeID | shapeGroup | shapeType | geometry | |
|---|---|---|---|---|---|---|---|
| 0 | 31000101131223 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.28369 -0.52734, 98.28369 -0.50536... |
| 1 | 31000101133001 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.28369 -0.54931, 98.28369 -0.52734... |
| 2 | 31000101131232 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.30566 -0.52734, 98.30566 -0.50536... |
| 3 | 31000101133010 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.30566 -0.54931, 98.30566 -0.52734... |
| 4 | 31000101131231 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.32764 -0.50536, 98.32764 -0.48339... |
admin_grids_gdf.info()<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 340122 entries, 0 to 340121
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 quadkey 340122 non-null object
1 shapeName 340122 non-null object
2 shapeISO 340122 non-null object
3 shapeID 340122 non-null object
4 shapeGroup 340122 non-null object
5 shapeType 340122 non-null object
6 geometry 340122 non-null geometry
dtypes: geometry(1), object(6)
memory usage: 18.2+ MB
admin_grids_gdf.crs<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
Inspect HDX file
from povertymapping.hdx import get_hdx_file
hdx_pop_file = get_hdx_file(REGION)
hdx_pop_filePath('/home/jc_tm/.cache/geowrangler/hdx/idn_general_2020.tif')
(hdx_pop_file.stat().st_size) / 1e987.714317324
hdx_pop_file.is_file()True
Compute Population Per Grid
For this calculation, we will calculate the population, batched based on groups calculated from the quadkey. By removing the last digits of the quadkey, we are able to get the “coarser” quadkey to which that tile belongs to. This grouping ensures that the tile groupings are geographically close to one another, which optimizes the raster window that we need to pull to calculate the population count.
admin_grids_gdf['quadkey_group'] = admin_grids_gdf['quadkey'].str[:-6]admin_grids_gdf| quadkey | shapeName | shapeISO | shapeID | shapeGroup | shapeType | geometry | quadkey_group | |
|---|---|---|---|---|---|---|---|---|
| 0 | 31000101131223 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.28369 -0.52734, 98.28369 -0.50536... | 31000101 |
| 1 | 31000101133001 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.28369 -0.54931, 98.28369 -0.52734... | 31000101 |
| 2 | 31000101131232 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.30566 -0.52734, 98.30566 -0.50536... | 31000101 |
| 3 | 31000101133010 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.30566 -0.54931, 98.30566 -0.52734... | 31000101 |
| 4 | 31000101131231 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.32764 -0.50536, 98.32764 -0.48339... | 31000101 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 340117 | 13233323210212 | Raja Ampat | None | IDN-ADM2-3_0_0-B422 | IDN | ADM2 | POLYGON ((131.17676 0.57128, 131.17676 0.59325... | 13233323 |
| 340118 | 13233323210230 | Raja Ampat | None | IDN-ADM2-3_0_0-B422 | IDN | ADM2 | POLYGON ((131.17676 0.54931, 131.17676 0.57128... | 13233323 |
| 340119 | 13233323210213 | Raja Ampat | None | IDN-ADM2-3_0_0-B422 | IDN | ADM2 | POLYGON ((131.19873 0.57128, 131.19873 0.59325... | 13233323 |
| 340120 | 13233323210231 | Raja Ampat | None | IDN-ADM2-3_0_0-B422 | IDN | ADM2 | POLYGON ((131.19873 0.54931, 131.19873 0.57128... | 13233323 |
| 340121 | 13233323210320 | Raja Ampat | None | IDN-ADM2-3_0_0-B422 | IDN | ADM2 | POLYGON ((131.22070 0.54931, 131.22070 0.57128... | 13233323 |
340122 rows × 8 columns
# Demonstrate how the quadkey grouping gives us geographically close grids
quadkey_groups = list(admin_grids_gdf['quadkey_group'].unique())
i = 9
test_group = quadkey_groups[i]
group_gdf = admin_grids_gdf[admin_grids_gdf['quadkey_group'] == test_group]
group_gdf.explore()Make this Notebook Trusted to load map: File -> Trust Notebook
aggregation=dict(
column='population',
output='pop_count',
func='sum')
extra_args=dict(nodata=np.nan)Test on 1k grids
admin_grids_pop_count = compute_raster_stats(
admin_grids_gdf.iloc[:1_000],
hdx_pop_file,
aggregation=aggregation,
extra_args=extra_args,
group_col="quadkey_group",
max_batch_size=None,
n_workers=None,
)
admin_grids_filtered = admin_grids_pop_count[admin_grids_pop_count["pop_count"] > 0]
admin_grids_filtered.explore()2023-03-07 15:00:09.586 | INFO | povertymapping.rollout_grids:compute_raster_stats:68 - Creating raster zonal stats for 1000 grids for file size 87714.317324 Mb, batched in 5 unique group/s from quadkey_group
2023-03-07 15:00:09.591 | WARNING | povertymapping.rollout_grids:compute_raster_stats:71 - When batching by group, output gdf rows will be ordered based on the group.
100%|██████████| 5/5 [01:46<00:00, 21.22s/it]
2023-03-07 15:01:55.731 | INFO | povertymapping.rollout_grids:compute_raster_stats:86 - Completed raster zonal stats for 5 groups
2023-03-07 15:01:55.793 | INFO | povertymapping.rollout_grids:compute_raster_stats:88 - Concatenated raster zonal stats for 5 groups
Make this Notebook Trusted to load map: File -> Trust Notebook
Full run
admin_grids_pop_count = compute_raster_stats(
admin_grids_gdf,
hdx_pop_file,
aggregation=aggregation,
extra_args=extra_args,
group_col="quadkey_group",
max_batch_size=None,
n_workers=None,
)admin_grids_filtered = admin_grids_pop_count[admin_grids_pop_count["pop_count"] > 0]
admin_grids_filtered = admin_grids_filtered.reset_index(drop=True)
admin_grids_filtered.info()<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 161230 entries, 0 to 161229
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 quadkey 161230 non-null object
1 shapeName 161230 non-null object
2 shapeISO 161230 non-null object
3 shapeID 161230 non-null object
4 shapeGroup 161230 non-null object
5 shapeType 161230 non-null object
6 geometry 161230 non-null geometry
7 quadkey_group 161230 non-null object
8 pop_count 161230 non-null float64
dtypes: float64(1), geometry(1), object(7)
memory usage: 11.1+ MB
admin_grids_filtered.head()| quadkey | shapeName | shapeISO | shapeID | shapeGroup | shapeType | geometry | quadkey_group | pop_count | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 31000101131232 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.30566 -0.52734, 98.30566 -0.50536... | 31000101 | 185.680420 |
| 1 | 31000101131302 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.34961 -0.48339, 98.34961 -0.46142... | 31000101 | 244.316330 |
| 2 | 31000101113301 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.37158 -0.28564, 98.37158 -0.26367... | 31000101 | 175.907745 |
| 3 | 31000101113323 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.37158 -0.35156, 98.37158 -0.32959... | 31000101 | 214.998383 |
| 4 | 31000101131101 | Nias Selatan | None | IDN-ADM2-3_0_0-B371 | IDN | ADM2 | POLYGON ((98.37158 -0.37353, 98.37158 -0.35156... | 31000101 | 78.181221 |
admin_grids_filtered.head(20000).explore()Make this Notebook Trusted to load map: File -> Trust Notebook